rm(list=ls())

library(openxlsx)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(tokenizers)
library(dplyr)

#####################
# UNGA descriptives #
#####################
unga_meta <- read.xlsx('posts_of_speakers.xlsx')
speakers <- data.frame('year' = 1970:2017, 
                       'n_members' = c(127, 132, 132, 135, 138, 144, 147, 149, 151, 152, 
                                       154, 157, 157, 158, 159, 159, 159, 159, 159, 159,
                                       159, 166, 179, 184, 185, 185, 185, 185, 185, 188,
                                       189, 189, 191, 191, 191, 191, 192, 192, 192, 192,
                                       192, 193, 193, 193, 193, 193, 193, 193))

# Figure 1: coverage plot
speakers <- unga_meta %>% group_by(Year) %>% dplyr::count() %>% 
  dplyr::rename(n_speakers = n) %>% left_join(speakers, by=c('Year'='year'))
speakers <- melt(speakers, id.vars='Year')
speakers$variable <- recode(speakers$variable, n_speakers ='Speakers', n_members='Member States')
speakers$variable = with(speakers, factor(variable, levels = rev(levels(variable))))


ggplot(speakers) + geom_line(aes(x=Year, y=value, color=variable)) + 
  xlab(NULL) + ylab('Count') +
  xlim(1972, 2016) + ylim(120, 200) + 
  theme(legend.position = 'none',
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
  scale_color_manual(values=c('red', 'blue')) +
  ggplot2::annotate('text', x=2008, y=197.5, label='Member States', color='red', size=5) + 
  ggplot2::annotate('text', x=2010.8, y=185, label='Speakers', color='blue', size=5)

ggsave('Figure 1.png', width=6, height=4)

# Figure 2: proportion of border-related language

full_unga_speeches <- read.csv('UNGAplenary_tiles_v5.csv', stringsAsFactors = FALSE)
undergrad_predicted <- read.csv('kw_relevance_predicted_set.csv', stringsAsFactors = FALSE)
undergrad_predicted <- undergrad_predicted %>% filter(Predicted == 1)

full_unga_speeches$n_words <- count_words(full_unga_speeches$text)
undergrad_predicted$n_words <- count_words(undergrad_predicted$text)

border_tiles <- data.frame('year'=1970:2017)
border_tiles <- full_unga_speeches %>% group_by(year) %>% dplyr::summarise(tiles_overall = n()) %>% left_join(border_tiles)
border_tiles <- undergrad_predicted %>% group_by(year) %>% dplyr::summarise(tiles_predicted = n()) %>% left_join(border_tiles)

border_tiles <- full_unga_speeches %>% group_by(year) %>% dplyr::summarise(words_overall = sum(n_words)) %>% left_join(border_tiles)
border_tiles <- undergrad_predicted %>% group_by(year) %>% dplyr::summarise(words_predicted = sum(n_words)) %>% left_join(border_tiles)

border_tiles$predicted_prop_tiles <- border_tiles$tiles_predicted/border_tiles$tiles_overall
border_tiles$predicted_prop_words <- border_tiles$words_predicted/border_tiles$words_overall

# versus territorial disputes
load('Conquest REP.RData')
table <- table %>% filter(year > 1970) %>% group_by(year) %>% dplyr::summarise(n = n())
ggplot(border_tiles) + 
  geom_smooth(aes(year, predicted_prop_tiles), alpha=1, fill='grey85') + 
  #geom_point(aes(year, predicted_prop_tiles)) + 
  geom_smooth(data = table, 
              aes(x = year, y = n*(max(border_tiles$predicted_prop_tiles)/max(n))), linetype = 'dashed',
              color = 'red') + 
  xlab(NULL) + ylab('Proportion Border Related') + 
  theme(legend.position = 'none',
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) +
  annotate('text', 
           label = c('UNGA border mentions', 'Conquest attempts per year'),
           color = c('blue', 'red'),
           x = 1995, y = c(0.04, 0.01)) + 
  scale_y_continuous( sec.axis = sec_axis(~.*max(table$n)/(max(border_tiles$predicted_prop_tiles)), 
                                          name="Conquest Attempts")) + 
  theme(
    axis.title.y = element_text(color = 'blue'),
    axis.title.y.right = element_text(color = 'red'),
    axis.text.y.left = element_text(color = 'blue'),
    axis.text.y.right = element_text(color='red')
  )
  
ggsave('Figure 2.png', width=5, height=4)
